---
title: "R3.Balance"
output: pdf_document
---

# Load Packages
```{r}
library(tidyverse)
library(estimatr)
```

# Load Data
```{r}
load("Data/data_final.Rdata")
```

# Set Covariates
```{r}
covs_list <- c("ln_PopDens_70", "Cities_10km", "ln_Confs_All", "PrRecH_Adm3", "Isolados1950", "PrWheatAr_1972", "LandIneqRat", "TerrainSlopeIndex", "MedAltitude", "Communist_1975", "MinDistPCP_Map")
```

# Summary Statistics (App. Table 1b)
```{r}

data_final_rescale1 <- data_final %>%
  mutate(MinDistPCP_Map = MinDistPCP_Map/1000)

data.sum <- data.frame(data_final_rescale1) %>%
                       select("CA1_CAR1U_MeanF",
                              "CaloricMean",
                              "pr_ha_ord",
                              "tx_Hi",
                              "ln_PopDens_70",
                              "Cities_10km",
                              "ln_Confs_All",
                              "PrRecH_Adm3",
                              "Isolados1950",
                              "PrWheatAr_1972",
                              "LandIneqRat",
                              "TerrainSlopeIndex",
                              "MedAltitude",
                              "log_gnr_events_Adm23", 
                              "Communist_1975", 
                              "MinDistPCP_Map")

stargazer::stargazer(data.sum,
                     covariate.labels = c("Points Index",
                                          "Mean Potential Caloric Yield",
                                          "% Hectares Expropriated",
                                          "Treatment",
                                          "Log(Population Density, 1970)",
                                          "Distance to Nearest City, 10Km",
                                          "Log(Social Conflicts, 1947-1962)" ,
                                          "% of Men Conscripted into Military, 1967" ,
                                          "% of Autonomous Farmers, 1950" ,
                                          "% of Land Committed to Wheat, 1972"  ,
                                          "Land Inequality Ratio, 1968",
                                          "Terrain Slope Index" ,
                                          "Median Altitude" ,
                                          "Log(GNR Events, 1976-83)", 
                                          "Communist Vote Share, 1975", 
                                          "Distance to Nearest PCP Office, Km"), 
                     iqr = T, type = "text")


```
# Rescale for Visualization
```{r}
data_final_rescale2 <- data_final %>%
  mutate(Communist_1975 = Communist_1975/10)
```

# Pooled Covariate Balance (Figure 6)
```{r}

output <- data.frame(covariates = covs_list, coef = NA, se = NA, n = NA)

for (i in 1:length(covs_list)){
  model <- lm_robust(as.formula(paste(covs_list[i], paste(c("tx_Hi", "as.factor(DistN)"), collapse=" + "), sep = " ~ ")), data = data_final_rescale2)
  output[i,"coef"]<-model$coefficients["tx_Hi"]
  output[i,"se"]<-model$std.error["tx_Hi"]
  output[i,"n"]<-length(model$fitted.values)
}

output_list <- output %>% 
  mutate(covnames = ifelse(covariates == "ln_PopDens_70", "Log(Population Density)", NA), 
         covnames = ifelse(covariates == "Cities_10km", "Distance to Nearest City", covnames), 
         covnames = ifelse(covariates == "ln_Confs_All", "Log(Social Conflicts)", covnames), 
         covnames = ifelse(covariates == "PrRecH_Adm3", "% of Men Conscripted into Military", covnames), 
         covnames = ifelse(covariates == "Isolados1950", "% of Autonomous Farmers", covnames), 
         covnames = ifelse(covariates == "PrWheatAr_1972", "% of Land Committed to Wheat", covnames), 
         covnames = ifelse(covariates == "LandIneqRat", "Land Inequality Ratio", covnames), 
         covnames = ifelse(covariates == "TerrainSlopeIndex", "Terrain Slope Index", covnames), 
         covnames = ifelse(covariates == "MedAltitude", "Median Altitude", covnames), 
         covnames = ifelse(covariates == "Communist_1975", "Communist Vote Share, 1975", covnames), 
         covnames = ifelse(covariates == "MinDistPCP_Map", "Distance to Nearest PCP Office", covnames))

output_list$covnames <- factor(output_list$covnames, 
                               levels = c("Log(Population Density)",
                                          "Distance to Nearest City",           
                                          "Log(Social Conflicts)",        
                                          "% of Men Conscripted into Military", 
                                          "% of Autonomous Farmers",            
                                          "% of Land Committed to Wheat",      
                                          "Land Inequality Ratio",              
                                          "Terrain Slope Index",                     
                                          "Median Altitude", 
                                          "Communist Vote Share, 1975", 
                                          "Distance to Nearest PCP Office"))       

ggplot(output_list, aes(x = reorder(covnames, desc(covnames)))) +
  geom_hline(aes(yintercept = 0), col = "red") +
  geom_point(aes(y = coef), position = position_dodge(width = 1)) + 
  geom_linerange(aes(ymin=coef - (qt(0.975, n)*se), ymax=coef + (qt(0.975, n)*se)), position = position_dodge(width = 1), size = 0.5) +
  geom_linerange(aes(ymin=coef - (qt(0.95, n)*se), ymax=coef + (qt(0.95, n)*se)), position = position_dodge(width = 1), size = 0.75) +
  coord_flip() +
  ylab("Treatment - Control Difference") + xlab("Covariates") +
  theme_light()

ggsave(plot = last_plot(),
             "Figures/fg6_BalancePool.pdf",
             width = 7, height = 2.5)

```

# Grouped Covariate Balance by Land Reform Intensity (App. Figure 1c)
```{r}
group_names <- data_final %>% select(contains("exp_cat2")) %>%
  mutate(exp_cat2_name = ifelse(exp_cat2 == 1, "LR < 50%", exp_cat2),
         exp_cat2_name = ifelse(exp_cat2 == 2, "50% < LR", exp_cat2_name),
         exp_cat2_name = ifelse(exp_cat2 == 0, "No LR", exp_cat2_name)) %>%
  unique()
```

```{r}
output <- list()
for (g in 0:2){ # g<-0
output[[g+1]] <- data.frame(covariates = covs_list, group = g, subset = "exp_cat2", coef = NA, se = NA, n = NA)
  for (i in 1:length(covs_list)){ # i<-1
    model <- lm_robust(as.formula(paste(covs_list[i], paste(c("tx_Hi", "as.factor(DistN)"), collapse=" + "), sep = " ~ ")), 
                            data = data_final_rescale2 %>% filter(exp_cat2 == g))
    output[[g+1]][i,"coef"]<-model$coefficients["tx_Hi"]
    output[[g+1]][i,"se"]<-model$std.error["tx_Hi"]
    output[[g+1]][i,"n"]<-length(model$fitted.values)
  }
}

output_list <- do.call(rbind, output)  %>% 
  mutate(covnames = ifelse(covariates == "ln_PopDens_70", "Log(Population Density)", NA), 
         covnames = ifelse(covariates == "Cities_10km", "Distance to Nearest City", covnames), 
         covnames = ifelse(covariates == "ln_Confs_All", "Log(Social Conflicts)", covnames), 
         covnames = ifelse(covariates == "PrRecH_Adm3", "% of Men Conscripted into Military", covnames), 
         covnames = ifelse(covariates == "Isolados1950", "% of Autonomous Farmers", covnames), 
         covnames = ifelse(covariates == "PrWheatAr_1972", "% of Land Committed to Wheat", covnames), 
         covnames = ifelse(covariates == "LandIneqRat", "Land Inequality Ratio", covnames), 
         covnames = ifelse(covariates == "TerrainSlopeIndex", "Terrain Slope Index", covnames), 
         covnames = ifelse(covariates == "MedAltitude", "Median Altitude", covnames), 
         covnames = ifelse(covariates == "Communist_1975", "Communist Vote Share, 1975", covnames), 
         covnames = ifelse(covariates == "MinDistPCP_Map", "Distance to Nearest PCP Office", covnames)) %>% 
  left_join(group_names %>% rename(group = exp_cat2))

output_list$covnames <- factor(output_list$covnames, 
                               levels = c("Log(Population Density)",
                                          "Distance to Nearest City",           
                                          "Log(Social Conflicts)",        
                                          "% of Men Conscripted into Military", 
                                          "% of Autonomous Farmers",            
                                          "% of Land Committed to Wheat",      
                                          "Land Inequality Ratio",              
                                          "Terrain Slope Index",                     
                                          "Median Altitude", 
                                          "Communist Vote Share, 1975", 
                                          "Distance to Nearest PCP Office"))       

ggplot(output_list, aes(x = reorder(covnames, desc(covnames)), col = exp_cat2_name, shape = exp_cat2_name)) +
  geom_hline(aes(yintercept = 0), col = "red") +
  geom_point(aes(y = coef), position = position_dodge(width = 1)) + 
  geom_linerange(aes(ymin=coef - (qt(0.975, n)*se), ymax=coef + (qt(0.975, n)*se)), position = position_dodge(width = 1), size = 0.5) +
  geom_linerange(aes(ymin=coef - (qt(0.95, n)*se), ymax=coef + (qt(0.95, n)*se)), position = position_dodge(width = 1), size = 0.75) +
  coord_flip() +
  ylab("Treatment - Control Difference") + xlab("Covariates") +
  scale_color_manual(name = "Land Reform Intensity", labels = c("No LR", "LR < 50%, or adjacent", "LR >= 50%"),
                     values = c("#252525", "#636363", "#969696"))+
  scale_shape_manual(name = "Land Reform Intensity", labels = c("No LR", "LR < 50%, or adjacent", "LR >= 50%"),
                       values = c(16, 17, 15)) +
  theme_light()

ggsave(plot = last_plot(),
             "Figures/Afg1c_BalanceGroup.pdf",
             width = 7, height = 3.5)

```